# -*- coding: utf-8 -*-
"""Entropy_Auto.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1RGORzGDBPtkkbqzbm2hwnhrFk-RuUfBt
"""

import pandas as pd
import math

# Provide the file path or filename if it's in the same directory as your script
csv_file = "DataEntropy.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

# Function to calculate entropy
def calculate_entropy(df, target_column):
    entropy = 0
    total_count = len(df)
    target_value_counts = df[target_column].value_counts()

    for value_count in target_value_counts:
        probability = value_count / total_count
        entropy -= probability * math.log2(probability)

    return entropy

# Function to calculate information gain
def calculate_information_gain(df, feature_column, target_column):
    total_entropy = calculate_entropy(df, target_column)
    feature_value_counts = df[feature_column].value_counts()
    weighted_entropy = 0

    for value, count in feature_value_counts.items():
        subset = df[df[feature_column] == value]
        subset_entropy = calculate_entropy(subset, target_column)
        weighted_entropy += (count / len(df)) * subset_entropy

    information_gain = total_entropy - weighted_entropy
    return information_gain

# Calculate information gain for each feature
target_column = "Target"
features = ["Akses (Pusat Kota)", "Akses (Jalan Tol)", "Akses (Jalan Raya)", "Fasum (Pusat Belanja)","Fasum (Rumah Sakit)","Fasum (Stasiun)","Lingkungan (Tidak Banjir)","Lingkungan (Tidak Kumuh)","Lingkungan (Tidak Macet)"]

for feature in features:
    ig = calculate_information_gain(df, feature, target_column)
    print(f"Information Gain for {feature}: {ig}")



